import pandas as pd
import matplotlib.pyplot as plot

# 相当于一个字典，默认索引从0开始
s = pd.Series(data=[1, 2, 3, 4], index=["a", "b", "c", "d"])

print(s)
print(s["a"])

data = {
    "name": ["张三", "李四", "王五"],
    "age": [23, 24, 25]
}

df = pd.DataFrame(data=data)
print(df)

print(df["age"] > 23)

print(df[df["age"] > 23])

print(df.describe())

# 读取文件
student = pd.read_csv("data/students.txt", sep=",", encoding="utf-8", header=None,
                      names=["id", "name", "age", "gender", "clazz"])

print(student[student["gender"] == "男"])

print(student["age"])

print(student["age"].value_counts().sort_values(ascending=False))

student["age"].value_counts().sort_values(ascending=False).plot.bar()

student["clazz"].value_counts().plot.bar()

# 显示画的图
# plot.show()

print(student.groupby(by="clazz").count()["id"].reset_index())

score = pd.read_csv("data/score.txt", sep=",", encoding="utf-8", header=None,
                    names=["id", "c_id", "score"])

print(score.groupby(by="id").sum("score")["score"].reset_index())

# join
print(pd.merge(score, student, left_on="id", right_on="id"))

score.groupby(by="id").sum("score")["score"].reset_index().to_csv("data/sumscore.csv", header=None, index=None)

